{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Link Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from anomalous_vertices_detection.datasets.academia import load_data\n",
    "from anomalous_vertices_detection.graph_learning_controller import *\n",
    "from anomalous_vertices_detection.learners.sklearner import SkLearner"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading graph...\n",
      "Data loaded.\n",
      "Generating 77017 vertices.\n",
      "77017 fake users generated.\n"
     ]
    }
   ],
   "source": [
    "labels = {\"neg\": \"Real\", \"pos\": \"Fake\"}\n",
    "\n",
    "my_graph, dataset_config = load_data(labels_map=labels, simulate_fake_vertices=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "glc = GraphLearningController(SkLearner(labels=labels), dataset_config)\n",
    "output_folder = \"../output/\"\n",
    "test_path, training_path = os.path.join(output_folder, dataset_config.name + \"_test.csv\"),\\\n",
    "                                        os.path.join(output_folder, dataset_config.name + \"_train.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "GraphSampler is a class the sempales vertices and edges from grpahs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "sampler = GraphSampler(my_graph, 3, 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "sampler.generate_sample_for_unlabeled_links(n, m) return a n exsiting edges and m simulate edges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Graph loaded\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "20000feature [00:07, 2657.10feature/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Features were written to: ../output/academia_config_train.csv\n"
     ]
    }
   ],
   "source": [
    "glc.extract_features_for_set(my_graph, sampler.generate_sample_for_unlabeled_links(10000, 10000), training_path,\n",
    "                             fast_link_features[my_graph.is_directed])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Graph loaded\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "20000feature [00:06, 2916.30feature/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Features were written to: ../output/academia_config_test.csv\n"
     ]
    }
   ],
   "source": [
    "glc.extract_features_for_set(my_graph, sampler.generate_sample_for_labeled_links(10000, 10000), test_path,\n",
    "                             fast_link_features[my_graph.is_directed])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Setting training and test sets\n",
      "Existing files were loaded.\n",
      "Training 10-fold validation: {'auc': 0.97412786249999994, 'tnr': 0.89589999999999992, 'recall': 0.97070000000000012, 'precision': 0.90319095500463198, 'fpr': 0.10410000000000001, 'accuracy': 0.93330000000000002}\n",
      "Test evaluation: {'auc': 0.99919369728869412, 'tnr': 0.8947191727935006, 'recall': 0.9996230682246513, 'precision': 0.45818935729094679, 'fpr': 0.10528082720649948, 'accuracy': 0.90329839704069048}\n"
     ]
    }
   ],
   "source": [
    "glc.evaluate_classifier(my_graph, test_path, training_path)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
